df <- as.data.frame(read.table('./acea-water-prediction/Aquifer_Petrignano.csv', header = TRUE, sep=','))
head(df, n=20)
## п.їDate Rainfall_Bastia_Umbra Depth_to_Groundwater_P24
## 1 14/03/2006 NA -22.48
## 2 15/03/2006 NA -22.38
## 3 16/03/2006 NA -22.25
## 4 17/03/2006 NA -22.38
## 5 18/03/2006 NA -22.60
## 6 19/03/2006 NA -22.35
## 7 20/03/2006 NA -22.50
## 8 21/03/2006 NA -22.31
## 9 22/03/2006 NA -22.31
## 10 23/03/2006 NA -22.40
## 11 24/03/2006 NA -22.32
## 12 25/03/2006 NA -22.25
## 13 26/03/2006 NA -22.15
## 14 27/03/2006 NA -22.47
## 15 28/03/2006 NA -22.27
## 16 29/03/2006 NA -22.52
## 17 30/03/2006 NA -22.50
## 18 31/03/2006 NA -22.70
## 19 01/04/2006 NA -22.77
## 20 02/04/2006 NA -22.49
## Depth_to_Groundwater_P25 Temperature_Bastia_Umbra Temperature_Petrignano
## 1 -22.18 NA NA
## 2 -22.14 NA NA
## 3 -22.04 NA NA
## 4 -22.04 NA NA
## 5 -22.04 NA NA
## 6 -21.95 NA NA
## 7 -21.99 NA NA
## 8 -21.89 NA NA
## 9 -21.82 NA NA
## 10 -21.89 NA NA
## 11 -21.89 NA NA
## 12 -21.82 NA NA
## 13 -21.79 NA NA
## 14 -21.83 NA NA
## 15 -21.75 NA NA
## 16 -21.81 NA NA
## 17 -21.84 NA NA
## 18 -21.87 NA NA
## 19 -21.90 NA NA
## 20 -21.83 NA NA
## Volume_C10_Petrignano Hydrometry_Fiume_Chiascio_Petrignano
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## 7 NA NA
## 8 NA NA
## 9 NA NA
## 10 NA NA
## 11 NA NA
## 12 NA NA
## 13 NA NA
## 14 NA NA
## 15 NA NA
## 16 NA NA
## 17 NA NA
## 18 NA NA
## 19 NA NA
## 20 NA NA
Remove NA elements from data
df <- df[complete.cases(df$Rainfall_Bastia_Umbra),]
rownames(df) <- 1:nrow(df)
head(df, n=20)
## п.їDate Rainfall_Bastia_Umbra Depth_to_Groundwater_P24
## 1 01/01/2009 0.0 -31.96
## 2 02/01/2009 0.0 -32.03
## 3 03/01/2009 0.0 -31.97
## 4 04/01/2009 0.0 -31.91
## 5 05/01/2009 0.0 -31.94
## 6 06/01/2009 0.0 -31.89
## 7 07/01/2009 0.0 -31.91
## 8 08/01/2009 0.0 -31.83
## 9 09/01/2009 0.9 -31.80
## 10 10/01/2009 0.0 -31.76
## 11 11/01/2009 0.0 -31.70
## 12 12/01/2009 0.0 -31.57
## 13 13/01/2009 1.1 -31.54
## 14 14/01/2009 0.0 -31.44
## 15 15/01/2009 0.0 -31.26
## 16 16/01/2009 0.0 -31.50
## 17 17/01/2009 0.0 -31.61
## 18 18/01/2009 0.1 -31.15
## 19 19/01/2009 0.1 -30.95
## 20 20/01/2009 0.0 -30.93
## Depth_to_Groundwater_P25 Temperature_Bastia_Umbra Temperature_Petrignano
## 1 -31.14 5.2 4.9
## 2 -31.11 2.3 2.5
## 3 -31.07 4.4 3.9
## 4 -31.05 0.8 0.8
## 5 -31.01 -1.9 -2.1
## 6 -31.00 -0.7 -0.7
## 7 -30.96 1.5 -0.3
## 8 -30.94 4.3 6.6
## 9 -30.93 4.9 4.8
## 10 -30.87 1.9 4.2
## 11 -30.83 3.4 4.5
## 12 -30.74 3.3 3.9
## 13 -30.63 6.0 6.3
## 14 -30.55 8.2 8.1
## 15 -30.57 7.8 7.1
## 16 -30.61 5.1 5.3
## 17 -30.68 1.6 1.6
## 18 -30.41 6.4 6.4
## 19 -30.28 10.3 10.3
## 20 -30.21 12.1 12.0
## Volume_C10_Petrignano Hydrometry_Fiume_Chiascio_Petrignano
## 1 -24530.69 2.4
## 2 -28785.89 2.5
## 3 -25766.21 2.4
## 4 -27919.30 2.4
## 5 -29854.66 2.3
## 6 -29124.58 2.3
## 7 -31173.12 2.3
## 8 -30232.22 2.4
## 9 -30597.70 2.3
## 10 -31337.28 2.3
## 11 -29845.15 2.3
## 12 -28745.28 2.3
## 13 -28932.77 2.3
## 14 -28600.13 2.3
## 15 -24973.06 2.3
## 16 -31388.26 2.3
## 17 -30941.57 2.3
## 18 -23043.74 2.3
## 19 -21658.75 2.3
## 20 -23793.70 2.3
df <- subset(df, select = -c(Depth_to_Groundwater_P25, Temperature_Petrignano ))
colnames(df) <- c("Date", "Rainfall", "Depth_to_Groundwater","Temperature", " Volume", " Hydrometry" )
head(df, n=20)
## Date Rainfall Depth_to_Groundwater Temperature Volume Hydrometry
## 1 01/01/2009 0.0 -31.96 5.2 -24530.69 2.4
## 2 02/01/2009 0.0 -32.03 2.3 -28785.89 2.5
## 3 03/01/2009 0.0 -31.97 4.4 -25766.21 2.4
## 4 04/01/2009 0.0 -31.91 0.8 -27919.30 2.4
## 5 05/01/2009 0.0 -31.94 -1.9 -29854.66 2.3
## 6 06/01/2009 0.0 -31.89 -0.7 -29124.58 2.3
## 7 07/01/2009 0.0 -31.91 1.5 -31173.12 2.3
## 8 08/01/2009 0.0 -31.83 4.3 -30232.22 2.4
## 9 09/01/2009 0.9 -31.80 4.9 -30597.70 2.3
## 10 10/01/2009 0.0 -31.76 1.9 -31337.28 2.3
## 11 11/01/2009 0.0 -31.70 3.4 -29845.15 2.3
## 12 12/01/2009 0.0 -31.57 3.3 -28745.28 2.3
## 13 13/01/2009 1.1 -31.54 6.0 -28932.77 2.3
## 14 14/01/2009 0.0 -31.44 8.2 -28600.13 2.3
## 15 15/01/2009 0.0 -31.26 7.8 -24973.06 2.3
## 16 16/01/2009 0.0 -31.50 5.1 -31388.26 2.3
## 17 17/01/2009 0.0 -31.61 1.6 -30941.57 2.3
## 18 18/01/2009 0.1 -31.15 6.4 -23043.74 2.3
## 19 19/01/2009 0.1 -30.95 10.3 -21658.75 2.3
## 20 20/01/2009 0.0 -30.93 12.1 -23793.70 2.3
Plot data
library('ggplot2')
library('forecast')
library('zoo')
library('dplyr')
library('data.table')
library('imputeTS')
library('xts')
library('tseries')
library('stats')
library('nlme')
library('fpp')
library('lubridate')
library('TSstudio')
# sort by date
df$Date <- as.Date(df$Date, format= "%d/%m/%Y")
df <- df[order(df$Date), ]
interval = df$Date - shift(df$Date, n=1, fill=NA, type="lag")
for(i in interval)
{
if(isTRUE(i > 1) || is.null(i))
{
print(i)
}
}
#depth <- diff(depth)
depth <- na_interpolation(df$Depth_to_Groundwater, option = "linear")
depth_ts <- ts(depth, start = df$Date[1], frequency=365)
# see that it is not statonary
adf.test(depth,alternative="stationary")
##
## Augmented Dickey-Fuller Test
##
## data: depth
## Dickey-Fuller = -1.8074, Lag order = 16, p-value = 0.6599
## alternative hypothesis: stationary
# box cox func
lambda <- BoxCox.lambda(depth_ts)
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
lambda
## [1] 1.999959
tmp <- BoxCox(depth_ts, lambda = lambda)
plot.ts(BoxCox(depth_ts, lambda = lambda))
# diff to get stationary
depth_ts <- diff(tmp)
# it is really stationary
adf.test(depth_ts,alternative="stationary")
## Warning in adf.test(depth_ts, alternative = "stationary"): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: depth_ts
## Dickey-Fuller = -11.798, Lag order = 16, p-value = 0.01
## alternative hypothesis: stationary
lambda=1.999959 plot decomposed data
ts_decompose(depth_ts)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
look at seasonal
ts_seasonal(depth_ts, type = "all")
## Warning: No trace type specified and no positional attributes specified
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
ts_plot(depth_ts)
we can make some decisions from the seasonalty.
As we can see the minimum value of the depth is in November,December.
rainfall <- na_interpolation(df$Rainfall, option = "linear")
rainfall_ts <- ts(rainfall, start = df$Date[1], frequency=12)
rainfall_ts <- diff(rainfall_ts)
ts_seasonal(rainfall_ts, type = "all")
temperature <- na_interpolation(df$Temperature, option = "linear")
temperature_ts <- ts(temperature, start = df$Date[1], frequency=12)
temperature_ts <- diff(temperature_ts)
ts_seasonal(temperature_ts, type = "all")
From the plot we can see that the maximum temperature was in August, minimum in December
volume <- na_interpolation(df$` Volume`, option = "linear")
volume_ts <- ts(volume, start = df$Date[1], frequency=12)
volume_ts <- diff(volume_ts)
ts_seasonal(volume_ts, type = "all")
Maximum volume was in March, minimum in December
hydrometry <- na_interpolation(df$` Hydrometry`, option = "linear")
hydrometry_ts <- ts(hydrometry, start = df$Date[1], frequency=12)
hydrometry_ts <- diff(hydrometry_ts)
ts_seasonal(hydrometry_ts, type = "all")
Maximum in August, minimum in January.
The volume and hydrometry reached their minimum around the same time
MAKE ARIMA FOR RESIDUALS
# for resisuals(noise)
h1 <- 1500 # the length of the testing partition
h2 <- 770
decomposed_ts <- decompose(depth_ts, type=c("additive"))
USgas_split <- ts_split(decomposed_ts$random, sample.out = h1)
train <- na_interpolation(USgas_split$train, option = "linear")
test <- na_interpolation(USgas_split$test, option = "linear")
ts_info(train)
## The train series is a ts object with 1 variable and 2698 observations
## Frequency: 365
## Start time: 14245 2
## End time: 14252 144
ts_info(test)
## The test series is a ts object with 1 variable and 1500 observations
## Frequency: 365
## Start time: 14252 145
## End time: 14256 184
acf(train)
pacf(train)
arima_1 <- Arima(train, order=c(3,1,1))
arima_1
## Series: train
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## -0.1262 -0.1635 -0.0635 -0.9380
## s.e. 0.0202 0.0199 0.0200 0.0066
##
## sigma^2 estimated as 8.113: log likelihood=-6649.29
## AIC=13308.59 AICc=13308.61 BIC=13338.09
tsdisplay(residuals(arima_1), lag.max = 20)
fc1 <- forecast(arima_1,h=h1)
accuracy(fc1, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.05881447 2.845701 2.029092 79.55234 165.7098 0.5855924
## Test set -0.19685650 3.113553 2.314139 101.19785 101.9243 0.6678566
## ACF1 Theil's U
## Training set -0.006850946 NA
## Test set -0.049299903 1.00239
test_forecast(forecast.obj = fc1, actual = depth_ts, test = test)